%load_ext pretty_jupyter
# import packages
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON
import json
sparqlep = "http://graph.oceaninfohub.org/blazegraph/namespace/oih/sparql"
from minio import Minio
def publicurls(client, bucket, prefix):
urls = []
objects = client.list_objects(bucket, prefix=prefix, recursive=True)
for obj in objects:
result = client.stat_object(bucket, obj.object_name)
if result.size > 0: # how to tell if an objet obj.is_public ?????
url = client.presigned_get_object(bucket, obj.object_name)
# print(f"Public URL for object: {url}")
urls.append(url)
return urls
client = Minio("ossapi.oceaninfohub.org:80", secure=False) # Create client with anonymous access.
def get_sparql_dataframe(service, query):
"""
Helper function to convert SPARQL results into a Pandas data frame.
"""
sparql = SPARQLWrapper(service)
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
result = sparql.query()
processed_results = json.load(result.response)
cols = processed_results['head']['vars']
out = []
for row in processed_results['results']['bindings']:
item = []
for c in cols:
item.append(row.get(c, {}).get('value'))
out.append(item)
return pd.DataFrame(out, columns=cols)
About¶
This is the introduction to the Ocean InfoHub Release Graph.
Besides this HTML file we would want to package
- PDF version of this
- the graphs
- the original Jupyter Notebook that builds the HTML and PDF
- any JSON-LD frames or SHACL files used in generating this document
Resource Links¶
This is our first section. We use so called Jinja Markdown here. It allows us to combine Markdown with Python variables and makes for a more dynamic report.
We can for example print pandas version such as this: 1.5.3.
Providers¶
Graph Links¶
| provider by graph alias | size | date | link |
|---|---|---|---|
| africaioc | 707451 | 2023-03-23 17:18:18+00:00 | summonedafricaioc_v1_release.nq |
| aquadocs | 211713813 | 2023-03-23 17:18:18+00:00 | summonedaquadocs_v1_release.nq |
| cioos | 42653232 | 2023-03-23 17:18:23+00:00 | summonedcioos_v1_release.nq |
| edmerp | 17165450 | 2023-03-23 17:18:24+00:00 | summonededmerp_v1_release.nq |
| edmo | 21312369 | 2023-03-23 17:18:25+00:00 | summonededmo_v1_release.nq |
| emodnet | 312445 | 2023-03-23 17:18:26+00:00 | summonedemodnet_v1_release.nq |
| inanodc | 120868 | 2023-03-23 17:18:26+00:00 | summonedinanodc_v1_release.nq |
| invemardocuments | 30805743 | 2023-03-23 17:18:27+00:00 | summonedinvemardocuments_v1_release.nq |
| invemarexperts | 3337999 | 2023-03-23 17:18:28+00:00 | summonedinvemarexperts_v1_release.nq |
| invemarinstitutions | 730445 | 2023-03-23 17:18:28+00:00 | summonedinvemarinstitutions_v1_release.nq |
| invemartraining | 1435575 | 2023-03-23 17:18:28+00:00 | summonedinvemartraining_v1_release.nq |
| invemarvessels | 272856 | 2023-03-23 17:18:29+00:00 | summonedinvemarvessels_v1_release.nq |
| marinetraining | 2618862 | 2023-03-23 17:18:29+00:00 | summonedmarinetraining_v1_release.nq |
| obis | 43477772 | 2023-03-23 17:18:29+00:00 | summonedobis_v1_release.nq |
| obps | 12110954 | 2023-03-23 17:18:30+00:00 | summonedobps_v1_release.nq |
| oceanexperts | 180515480 | 2023-03-23 17:18:31+00:00 | summonedoceanexperts_v1_release.nq |
# we create a simple dataframe for demonstration purposes
urls = publicurls(client, "public", "graph")
Tabset¶
First Tab¶
Content of this first section will be generated into the first tab content.
Second Tab¶
Same goes for the second section.
rq_pcount = """SELECT ?p (COUNT(?p) as ?pCount)
WHERE
{
?s ?p ?o .
}
GROUP BY ?p
"""
dfc = get_sparql_dataframe(sparqlep, rq_pcount)
dfc['pCount'] = dfc["pCount"].astype(int) # convert count to int
# dfc.set_index('p', inplace=True)
dfc_sorted = dfc.sort_values('pCount', ascending=False)
countByLicense.rq¶
| p | pCount | |
|---|---|---|
| 154 | http://www.w3.org/1999/02/22-rdf-syntax-ns#type | 7914266 |
| 75 | http://www.w3.org/ns/prov#value | 2554814 |
| 74 | http://www.w3.org/ns/prov#used | 1277407 |
| 73 | http://www.w3.org/ns/prov#hadMember | 1277407 |
| 72 | http://www.w3.org/ns/prov#generated | 1277407 |










